/*
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Adapted from Apache Arrow.

// This module defines an abstract interface for iterating through pages in a
// Parquet column chunk within a row group. It could be extended in the future
// to iterate through all data pages in all chunks in a file.

#pragma once

#include <glog/logging.h>
#include <cstdint>
#include <memory>
#include <optional>
#include <string>

#include "velox/dwio/parquet/writer/arrow/Statistics.h"
#include "velox/dwio/parquet/writer/arrow/Types.h"

namespace facebook::velox::parquet::arrow {

// TODO: Parallel processing is not yet safe because of memory-ownership
// semantics (the PageReader may or may not own the memory referenced by a
// page)
//
// TODO(wesm): In the future Parquet implementations may store the crc code
// in facebook::velox::parquet::thrift::PageHeader. parquet-mr currently does
// not, so we also skip it here, both on the read and write path
class Page {
 public:
  Page(const std::shared_ptr<::arrow::Buffer>& buffer, PageType::type type)
      : buffer_(buffer), type_(type) {}

  PageType::type type() const {
    return type_;
  }

  std::shared_ptr<::arrow::Buffer> buffer() const {
    return buffer_;
  }

  // @returns: a pointer to the page's data
  const uint8_t* data() const {
    return buffer_->data();
  }

  // @returns: the total size in bytes of the page's data buffer
  int32_t size() const {
    return static_cast<int32_t>(buffer_->size());
  }

 private:
  std::shared_ptr<::arrow::Buffer> buffer_;
  PageType::type type_;
};

/// \brief Base type for DataPageV1 and DataPageV2 including common attributes
class DataPage : public Page {
 public:
  int32_t num_values() const {
    return num_values_;
  }
  Encoding::type encoding() const {
    return encoding_;
  }
  int64_t uncompressed_size() const {
    return uncompressed_size_;
  }
  const EncodedStatistics& statistics() const {
    return statistics_;
  }
  /// Return the row ordinal within the row group to the first row in the data
  /// page. Currently it is only present from data pages created by ColumnWriter
  /// in order to collect page index.
  std::optional<int64_t> first_row_index() const {
    return first_row_index_;
  }

  virtual ~DataPage() = default;

 protected:
  DataPage(
      PageType::type type,
      const std::shared_ptr<::arrow::Buffer>& buffer,
      int32_t num_values,
      Encoding::type encoding,
      int64_t uncompressed_size,
      const EncodedStatistics& statistics = EncodedStatistics(),
      std::optional<int64_t> first_row_index = std::nullopt)
      : Page(buffer, type),
        num_values_(num_values),
        encoding_(encoding),
        uncompressed_size_(uncompressed_size),
        statistics_(statistics),
        first_row_index_(std::move(first_row_index)) {}

  int32_t num_values_;
  Encoding::type encoding_;
  int64_t uncompressed_size_;
  EncodedStatistics statistics_;
  /// Row ordinal within the row group to the first row in the data page.
  std::optional<int64_t> first_row_index_;
};

class DataPageV1 : public DataPage {
 public:
  DataPageV1(
      const std::shared_ptr<::arrow::Buffer>& buffer,
      int32_t num_values,
      Encoding::type encoding,
      Encoding::type definition_level_encoding,
      Encoding::type repetition_level_encoding,
      int64_t uncompressed_size,
      const EncodedStatistics& statistics = EncodedStatistics(),
      std::optional<int64_t> first_row_index = std::nullopt)
      : DataPage(
            PageType::DATA_PAGE,
            buffer,
            num_values,
            encoding,
            uncompressed_size,
            statistics,
            std::move(first_row_index)),
        definition_level_encoding_(definition_level_encoding),
        repetition_level_encoding_(repetition_level_encoding) {}

  Encoding::type repetition_level_encoding() const {
    return repetition_level_encoding_;
  }

  Encoding::type definition_level_encoding() const {
    return definition_level_encoding_;
  }

 private:
  Encoding::type definition_level_encoding_;
  Encoding::type repetition_level_encoding_;
};

class DataPageV2 : public DataPage {
 public:
  DataPageV2(
      const std::shared_ptr<::arrow::Buffer>& buffer,
      int32_t num_values,
      int32_t num_nulls,
      int32_t num_rows,
      Encoding::type encoding,
      int32_t definition_levels_byte_length,
      int32_t repetition_levels_byte_length,
      int64_t uncompressed_size,
      bool is_compressed = false,
      const EncodedStatistics& statistics = EncodedStatistics(),
      std::optional<int64_t> first_row_index = std::nullopt)
      : DataPage(
            PageType::DATA_PAGE_V2,
            buffer,
            num_values,
            encoding,
            uncompressed_size,
            statistics,
            std::move(first_row_index)),
        num_nulls_(num_nulls),
        num_rows_(num_rows),
        definition_levels_byte_length_(definition_levels_byte_length),
        repetition_levels_byte_length_(repetition_levels_byte_length),
        is_compressed_(is_compressed) {}

  int32_t num_nulls() const {
    return num_nulls_;
  }

  int32_t num_rows() const {
    return num_rows_;
  }

  int32_t definition_levels_byte_length() const {
    return definition_levels_byte_length_;
  }

  int32_t repetition_levels_byte_length() const {
    return repetition_levels_byte_length_;
  }

  bool is_compressed() const {
    return is_compressed_;
  }

 private:
  int32_t num_nulls_;
  int32_t num_rows_;
  int32_t definition_levels_byte_length_;
  int32_t repetition_levels_byte_length_;
  bool is_compressed_;
};

class DictionaryPage : public Page {
 public:
  DictionaryPage(
      const std::shared_ptr<::arrow::Buffer>& buffer,
      int32_t num_values,
      Encoding::type encoding,
      bool is_sorted = false)
      : Page(buffer, PageType::DICTIONARY_PAGE),
        num_values_(num_values),
        encoding_(encoding),
        is_sorted_(is_sorted) {}

  int32_t num_values() const {
    return num_values_;
  }

  Encoding::type encoding() const {
    return encoding_;
  }

  bool is_sorted() const {
    return is_sorted_;
  }

 private:
  int32_t num_values_;
  Encoding::type encoding_;
  bool is_sorted_;
};

} // namespace facebook::velox::parquet::arrow
